1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
library(TH.data)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

All variables are derived from a laser scanning image of the eye background taken by the Heidelberg Retina Tomograph. Most of the variables describe either the area or volume in certain parts of the papilla and are measured in four sectors (temporal, superior, nasal and inferior) as well as for the whole papilla (global). The global measurement is, roughly, the sum of the measurements taken in the four sector.

The observations in both groups are matched by age and sex to prevent any bias.

Source Torsten Hothorn and Berthold Lausen (2003), Double-Bagging: Combining classifiers by bootstrap aggregation. Pattern Recognition, 36(6), 1303–1309.

1.2 The Data

GlaucomaM {TH.data}


data("GlaucomaM")

pander::pander(table(GlaucomaM$Class))
glaucoma normal
98 98

GlaucomaM$Class <- 1*(GlaucomaM$Class=="glaucoma")

1.2.0.1 Standarize the names for the reporting

studyName <- "GlaucomaM"
dataframe <- GlaucomaM
outcome <- "Class"
thro <- 0.80
TopVariables <- 10
cexheat = 0.25

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
196 62
pander::pander(table(dataframe[,outcome]))
0 1
98 98

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) > 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9961105

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 62 , Uni p: 0.03074051 , Uncorrelated Base: 6 , Outcome-Driven Size: 0 , Base Size: 6 
#> 
#> 
 1 <R=0.996,r=0.973,N=    8>, Top: 2( 5 )[ 1 : 2 Fa= 2 : 0.973 ]( 2 , 6 , 0 ),<|>Tot Used: 8 , Added: 6 , Zero Std: 0 , Max Cor: 0.971
#> 
 2 <R=0.971,r=0.961,N=    8>, Top: 6( 1 )[ 1 : 6 Fa= 8 : 0.961 ]( 6 , 6 , 2 ),<|>Tot Used: 20 , Added: 6 , Zero Std: 0 , Max Cor: 0.958
#> 
 3 <R=0.958,r=0.954,N=    8>, Top: 1( 2 )[ 1 : 1 Fa= 9 : 0.954 ]( 1 , 2 , 8 ),<|>Tot Used: 23 , Added: 2 , Zero Std: 0 , Max Cor: 0.952
#> 
 4 <R=0.952,r=0.926,N=   17>, Top: 6( 3 )[ 1 : 6 Fa= 10 : 0.926 ]( 6 , 11 , 9 ),<|>Tot Used: 35 , Added: 11 , Zero Std: 0 , Max Cor: 0.920
#> 
 5 <R=0.920,r=0.910,N=   17>, Top: 4( 1 )[ 1 : 4 Fa= 10 : 0.910 ]( 4 , 6 , 10 ),<|>Tot Used: 38 , Added: 6 , Zero Std: 0 , Max Cor: 0.910
#> 
 6 <R=0.910,r=0.905,N=   17>, Top: 2( 1 )[ 1 : 2 Fa= 12 : 0.905 ]( 2 , 2 , 10 ),<|>Tot Used: 38 , Added: 2 , Zero Std: 0 , Max Cor: 0.980
#> 
 7 <R=0.980,r=0.890,N=    4>, Top: 2( 1 )[ 1 : 2 Fa= 13 : 0.890 ]( 2 , 2 , 12 ),<|>Tot Used: 38 , Added: 2 , Zero Std: 0 , Max Cor: 0.941
#> 
 8 <R=0.941,r=0.821,N=   30>, Top: 12( 1 )[ 1 : 12 Fa= 20 : 0.821 ]( 12 , 16 , 13 ),<|>Tot Used: 52 , Added: 16 , Zero Std: 0 , Max Cor: 0.819
#> 
 9 <R=0.819,r=0.800,N=   30>, Top: 1( 2 )[ 1 : 1 Fa= 20 : 0.800 ]( 1 , 2 , 20 ),<|>Tot Used: 52 , Added: 2 , Zero Std: 0 , Max Cor: 0.799
#> 
 10 <R=0.799,r=0.800,N=    0>
#> 
 [ 10 ], 0.798676 Decor Dimension: 52 Nused: 52 . Cor to Base: 23 , ABase: 3 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

3.03

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

1.04

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

3.94

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

2.88

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.799364

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")



univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
vari 0.04480 0.0368 0.1150 0.0543 0.1035 0.880
varg 0.17851 0.1170 0.4139 0.1967 0.0252 0.873
vars 0.04506 0.0314 0.1068 0.0596 0.0172 0.851
tmi 0.03664 0.1258 -0.1097 0.1038 0.6980 0.832
varn 0.08224 0.0595 0.1774 0.0894 0.2533 0.830
hic 0.39863 0.1407 0.2114 0.1574 0.7659 0.822
tmg -0.03356 0.0885 -0.1524 0.0927 0.5179 0.818
phcg -0.03546 0.0691 -0.1216 0.0661 0.9359 0.818
phci 0.00898 0.0882 -0.0937 0.0779 0.7638 0.814
tms 0.03959 0.1166 -0.1192 0.1376 0.9756 0.808


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]

theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]

pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
varg 0.17851 0.11700 0.41387 0.19669 0.025181 0.873
La_eag -0.35521 0.24722 -0.74028 0.39257 0.498588 0.826
tmg -0.03356 0.08846 -0.15241 0.09266 0.517931 0.818
phcg -0.03546 0.06909 -0.12159 0.06612 0.935907 0.818
La_vbrt -0.01055 0.00781 -0.01926 0.00971 0.217120 0.806
rnf 0.13956 0.06764 0.22520 0.09753 0.225229 0.800
phcn 0.00692 0.07364 -0.07168 0.08814 0.303322 0.796
vart 0.00640 0.00533 0.01460 0.01166 0.000754 0.777
La_abrg -0.45638 0.22776 -0.70578 0.33111 0.260305 0.774
mhcg 0.12197 0.05958 0.06633 0.06596 0.925585 0.756
La_vbri 0.00920 0.01487 -0.00246 0.01346 0.216854 0.745
La_hic -0.04138 0.07090 -0.09946 0.08204 0.545713 0.721

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")

theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))


theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)

pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.24 45 0.726


allSigvars <- names(dc)



dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
  coef <- theFormulas[[dx]]
  cname <- names(theFormulas[[dx]])
  names(cname) <- cname
  for (cf in names(coef))
  {
    if (cf != dx)
    {
      if (coef[cf]>0)
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
      }
      else
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("%5.3f*%s",coef[cf],cname[cf]))
      }
    }
  }
}

finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
varg 0.17851 0.11700 0.41387 0.19669 2.52e-02 0.873 0.873 3
La_eag -0.929ag + 1.000eag -0.35521 0.24722 -0.74028 0.39257 4.99e-01 0.826 0.664 4
hic NA 0.39863 0.14073 0.21139 0.15743 7.66e-01 0.822 0.822 NA
tmg -0.03356 0.08846 -0.15241 0.09266 5.18e-01 0.818 0.818 2
phcg -0.03546 0.06909 -0.12159 0.06612 9.36e-01 0.818 0.818 NA
La_vbrt + 0.084vbsg -0.947vbst -0.090vbrg + 1.000vbrt -0.01055 0.00781 -0.01926 0.00971 2.17e-01 0.806 0.709 -2
vbri NA 0.14660 0.08893 0.06443 0.08073 2.88e-04 0.803 0.803 NA
rnf 0.13956 0.06764 0.22520 0.09753 2.25e-01 0.800 0.800 NA
phcn 0.00692 0.07364 -0.07168 0.08814 3.03e-01 0.796 0.796 NA
vart 0.00640 0.00533 0.01460 0.01166 7.54e-04 0.777 0.777 NA
La_abrg -0.999eag + 1.000abrg -0.45638 0.22776 -0.70578 0.33111 2.60e-01 0.774 0.758 2
vbrg NA 0.55871 0.35593 0.29249 0.43476 9.07e-06 0.771 0.771 NA
abrg NA 1.60782 0.64702 0.97601 0.78552 2.05e-01 0.758 0.758 NA
mhcg 0.12197 0.05958 0.06633 0.06596 9.26e-01 0.756 0.756 3
emd NA 0.36204 0.12231 0.25577 0.11134 7.61e-01 0.746 0.746 5
La_vbri + 0.189vbsg -0.799vbsi -0.213vbrg + 1.000vbri 0.00920 0.01487 -0.00246 0.01346 2.17e-01 0.745 0.803 -2
vbsi NA 0.20517 0.09529 0.12323 0.09254 6.57e-02 0.742 0.742 NA
vbsg NA 0.77012 0.37208 0.49666 0.40027 1.38e-01 0.724 0.724 NA
La_hic + 1.000hic -1.215emd -0.04138 0.07090 -0.09946 0.08204 5.46e-01 0.721 0.822 -1
vbrt NA 0.12276 0.08051 0.07163 0.06635 4.26e-02 0.709 0.709 NA
eag NA 2.06546 0.57098 1.68282 0.80355 4.00e-01 0.664 0.664 NA
vbst NA 0.15583 0.08263 0.11215 0.07113 4.71e-01 0.662 0.662 NA
ag NA 2.60522 0.53920 2.60784 0.76445 2.79e-01 0.477 0.477 6

1.10 Comparing IDeA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,tol=0.002)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 83 15
1 11 87
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.520 0.4481 0.592
    tp 0.500 0.4279 0.572
    se 0.888 0.8080 0.943
    sp 0.847 0.7601 0.912
    diag.ac 0.867 0.8117 0.911
    diag.or 43.764 19.0046 100.778
    nndx 1.361 1.1705 1.760
    youden 0.735 0.5682 0.854
    pv.pos 0.853 0.7691 0.915
    pv.neg 0.883 0.8003 0.940
    lr.pos 5.800 3.6213 9.289
    lr.neg 0.133 0.0755 0.233
    p.rout 0.480 0.4079 0.552
    p.rin 0.520 0.4481 0.592
    p.tpdn 0.153 0.0883 0.240
    p.tndp 0.112 0.0574 0.192
    p.dntp 0.147 0.0847 0.231
    p.dptn 0.117 0.0599 0.200
  • tab:

      Outcome + Outcome - Total
    Test + 87 15 102
    Test - 11 83 94
    Total 98 98 196
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.867 0.812 0.911
3 se 0.888 0.808 0.943
4 sp 0.847 0.760 0.912
6 diag.or 43.764 19.005 100.778

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 85 13
1 11 87
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.510 0.4380 0.582
    tp 0.500 0.4279 0.572
    se 0.888 0.8080 0.943
    sp 0.867 0.7838 0.927
    diag.ac 0.878 0.8233 0.920
    diag.or 51.713 21.9537 121.814
    nndx 1.324 1.1494 1.690
    youden 0.755 0.5919 0.870
    pv.pos 0.870 0.7880 0.929
    pv.neg 0.885 0.8042 0.941
    lr.pos 6.692 4.0142 11.157
    lr.neg 0.129 0.0738 0.227
    p.rout 0.490 0.4179 0.562
    p.rin 0.510 0.4380 0.582
    p.tpdn 0.133 0.0726 0.216
    p.tndp 0.112 0.0574 0.192
    p.dntp 0.130 0.0711 0.212
    p.dptn 0.115 0.0586 0.196
  • tab:

      Outcome + Outcome - Total
    Test + 87 13 100
    Test - 11 85 96
    Total 98 98 196
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.878 0.823 0.920
3 se 0.888 0.808 0.943
4 sp 0.867 0.784 0.927
6 diag.or 51.713 21.954 121.814

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 77 21
1 8 90
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.5663 0.4938 0.637
    tp 0.5000 0.4279 0.572
    se 0.9184 0.8455 0.964
    sp 0.7857 0.6913 0.862
    diag.ac 0.8520 0.7945 0.899
    diag.or 41.2500 17.2939 98.391
    nndx 1.4203 1.2102 1.863
    youden 0.7041 0.5368 0.826
    pv.pos 0.8108 0.7255 0.879
    pv.neg 0.9059 0.8229 0.958
    lr.pos 4.2857 2.9201 6.290
    lr.neg 0.1039 0.0531 0.203
    p.rout 0.4337 0.3632 0.506
    p.rin 0.5663 0.4938 0.637
    p.tpdn 0.2143 0.1378 0.309
    p.tndp 0.0816 0.0359 0.155
    p.dntp 0.1892 0.1211 0.275
    p.dptn 0.0941 0.0415 0.177
  • tab:

      Outcome + Outcome - Total
    Test + 90 21 111
    Test - 8 77 85
    Total 98 98 196
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.852 0.794 0.899
3 se 0.918 0.845 0.964
4 sp 0.786 0.691 0.862
6 diag.or 41.250 17.294 98.391


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 79 19
1 10 88
  pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.546 0.4734 0.617
    tp 0.500 0.4279 0.572
    se 0.898 0.8203 0.950
    sp 0.806 0.7139 0.879
    diag.ac 0.852 0.7945 0.899
    diag.or 36.589 16.0544 83.391
    nndx 1.420 1.2062 1.872
    youden 0.704 0.5343 0.829
    pv.pos 0.822 0.7367 0.890
    pv.neg 0.888 0.8031 0.945
    lr.pos 4.632 3.0762 6.973
    lr.neg 0.127 0.0698 0.230
    p.rout 0.454 0.3830 0.527
    p.rin 0.546 0.4734 0.617
    p.tpdn 0.194 0.1210 0.286
    p.tndp 0.102 0.0500 0.180
    p.dntp 0.178 0.1104 0.263
    p.dptn 0.112 0.0552 0.197
  • tab:

      Outcome + Outcome - Total
    Test + 88 19 107
    Test - 10 79 89
    Total 98 98 196
  • method: exact

  • digits: 2

  • conf.level: 0.95

  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.852 0.794 0.899
3 se 0.898 0.820 0.950
4 sp 0.806 0.714 0.879
6 diag.or 36.589 16.054 83.391
  par(op)